Problem set 5

Author

Herong Wang

Problem 1 - OOP Programming

a. define “rational” class

Code
# Define a constructor
setClass("rational",
         slots = c(a = "ANY",
                   b = "ANY"))

rational <- function(x1, x2) {
    return(new("rational", a = as.numeric(x1), b = as.numeric(x2)))

}

# Define a validator
setValidity("rational", function(object){
  newa <- as.numeric(object@a)
  newb <- as.numeric(object@b)
  if (is.na(newa)|is.na(newb)){
    stop("Please input valid number")
  }
  
  if (newb == 0) {
    stop("Denominator cannot be zero")
  } else if (newa%%1 != 0 | newb%%1 != 0){
    stop("Please input integer numerator or denominator")
  }
  return(TRUE)
})
Class "rational" [in ".GlobalEnv"]

Slots:
              
Name:    a   b
Class: ANY ANY
Code
# Define the show method

##' @title print a `rational`
##' @param object a `rational` object
##' @return the numerator and denominator of the `rational` object
setMethod("show", "rational",
  function(object) {
    cat("Numerator:", object@a)
    cat("\n")
    cat("Denominator:", object@b)
    return(invisible(object))
  }
)


# Create simplify function
# First I will write a function to calculate greatest common divisor 

##' @title report the greatest common divisor of two integers
##' @param a is the numerator 
##' @param b is the denominator
##' @return the greatest common divisor
gcb <- function(a, b){
  if(a == 0 | b == 0){
    c <- NULL
    return(c)
    next()
  }else if(a <= b){
    smaller <- a
  }else if(a > b){
    smaller <- b
  }
  
  for (i in 1:smaller){
    if(a%%i == 0 & b%%i == 0){
      c <- i
    }
  }
  
  return(c)
}


# Second I will create the simplify function

setGeneric("simplify",
           function(object) {
             standardGeneric("simplify")
           })
[1] "simplify"
Code
##' @title report simplest form a `rational` object
##' @param object a `rational` object
##' @return simplest form a `rational` object
setMethod("simplify", "rational",
          function(object) {
            c <- gcb(object@a, object@b)
            if(length(c) != 0){
              object@a <- object@a/c
              object@b <- object@b/c
            }
            return(object)
          })


# Create quotient function

setGeneric("quotient",
           function(object, ...) {
             standardGeneric("quotient")
           })
[1] "quotient"
Code
##' @title report quotient a `rational` object
##' @param object a `rational` object
##' @return quotient from a `rational` object
setMethod("quotient", "rational",
          function(object, digits = 2, ...) {
            newdig <- as.numeric(digits)
            if (is.na(newdig)){
              stop("Please input valid digits number")
            }else if (newdig%%1 != 0){
              stop("Please input valid digits number")
            }else if (newdig%%1 == 0){
              c <- object@a/object@b
              print(invisible(c), digits = newdig)
            }
            
          })


# Define addition; substraction, multiplication and division function

##' @title `rational` addition arithmetic.
##' @param e1 A `rational` object
##' @param e2 A `rational` object
##' @return A `rational` object
setMethod("+", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            return(invisible(simplify(rational(e1@a*e2@b + e2@a*e1@b, e1@b*e2@b))))
          })

##' @title `rational` subtraction arithmetic.
##' @param e1 A `rational` object
##' @param e2 A `rational` object
##' @return A `rational` object
setMethod("-", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            return(invisible(simplify(rational(e1@a*e2@b - e2@a*e1@b, e1@b*e2@b))))
          })

##' @title `rational` multiplication arithmetic.
##' @param e1 A `rational` object
##' @param e2 A `rational` object
##' @return A `rational` object
setMethod("*", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            x1 <- simplify(e1)
            x2 <- simplify(e2)
            return(invisible(simplify(rational(x1@a*x2@a , x1@b*x2@b))))
          })


##' @title `rational` division arithmetic.
##' @param e1 A `rational` object
##' @param e2 A `rational` object
##' @return A `rational` object
setMethod("/", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            x1 <- simplify(e1)
            x2 <- simplify(e2)
            return(invisible(simplify(rational(x1@a*x2@b , x1@b*x2@a))))
          })

b. evaluate code

Code
r1 <- rational(24, 6)
r2 <- rational(7, 230)
r3 <- rational(0, 4)

r1
Numerator: 24
Denominator: 6
Code
r3
Numerator: 0
Denominator: 4
Code
r1 + r2
Numerator: 927
Denominator: 230
Code
r1 - r2
Numerator: 913
Denominator: 230
Code
r1 * r2
Numerator: 14
Denominator: 115
Code
r1 / r2
Numerator: 920
Denominator: 7
Code
r1 + r3
Numerator: 4
Denominator: 1
Code
r1 * r3  
Numerator: 0
Denominator: 4
Code
r2 / r3
Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'simplify': Denominator cannot be zero
Code
quotient(r1)
[1] 4
Code
quotient(r2)
[1] 0.03
Code
quotient(r2, digits = 3) 
[1] 0.0304
Code
quotient(r2, digits = 3.14)
Error in .local(object, ...): Please input valid digits number
Code
quotient(r2, digits = "avocado")
Warning in .local(object, ...): NAs introduced by coercion
Error in .local(object, ...): Please input valid digits number
Code
q2 <- quotient(r2, digits = 3) 
[1] 0.0304
Code
q2
[1] 0.03043478
Code
quotient(r3)
[1] 0
Code
simplify(r1)
Numerator: 4
Denominator: 1
Code
simplify(r2)
Numerator: 7
Denominator: 230
Code
simplify(r3)
Numerator: 0
Denominator: 4

c. check malformed input to your constructor

Code
# if input 0 as denominator
rational(4,0) # will give error message that "Denominator cannot be zero"
Error in validityMethod(object): Denominator cannot be zero
Code
# if input a string
rational("1", "2") # if the string can be converted to number, it will not report any error
Numerator: 1
Denominator: 2
Code
rational("a", "b") # if not, it will give error message "Please input valid number"
Warning in initialize(value, ...): NAs introduced by coercion

Warning in initialize(value, ...): NAs introduced by coercion
Error in validityMethod(object): Please input valid number

Problem 2 - plotly

basic set up

Code
library(plotly)
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.2.3

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
Code
library(ggplot2)
library(tidyverse)
── Attaching packages
───────────────────────────────────────
tidyverse 1.3.2 ──
✔ tibble  3.2.1     ✔ dplyr   1.1.3
✔ tidyr   1.2.1     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 1.0.0
✔ purrr   0.3.5     
Warning: package 'tibble' was built under R version 4.2.3
Warning: package 'dplyr' was built under R version 4.2.3
Warning: package 'forcats' was built under R version 4.2.3
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks plotly::filter(), stats::filter()
✖ dplyr::lag()    masks stats::lag()
Code
df <- read.csv("C:/Users/herongw/Desktop/Umich_Phd/2024fall/STAT506/Problem_set5/df_for_ml_improved_new_market.csv")

a. Does the distribution of genre of sales across years appear to change

Code
# First I will clearn the data
genre <- df %>% 
  select(year, all_of(starts_with("Genre"))) %>% 
  mutate(Genre___Others2 = ifelse(Genre___Others==1 & Genre___Painting==1, 0, Genre___Others)) %>%   select(-Genre___Others) %>% 
  group_by(year) %>% 
  summarise(photography = sum(Genre___Photography), prints = sum(Genre___Print), sculpture = sum(Genre___Sculpture), painting = sum(Genre___Painting), others = sum(Genre___Others2))%>% 
  pivot_longer(photography:others, names_to = "genre", values_to = "sales") %>% 
  group_by(year) %>% 
  mutate(total = sum(sales), percent = sales/total)

# plot
p1 <- plot_ly(
  data = genre, x = ~year, y = ~percent, color = ~genre, type = "bar") |> 
  layout(barmode = "stack", title = "Genre of sales over years 1997-2012",
         yaxis = list(title = "Proportion of genre of sales"))

p1
Code
print("The plot suggests that the distribution of genre of sales chnage across years. Even thoght there is fluctuation, we can find an overall increasing trend in the proportion of sales in photography and overall decreasing trend in the proportion of sales in painting across years.")
[1] "The plot suggests that the distribution of genre of sales chnage across years. Even thoght there is fluctuation, we can find an overall increasing trend in the proportion of sales in photography and overall decreasing trend in the proportion of sales in painting across years."

b. sale prices over year and if genre affect it

Code
df2 <- df %>% 
  mutate(Genre___Others2 = ifelse(Genre___Others==1 & Genre___Painting==1, 0, Genre___Others)) %>%
  select(-Genre___Others) %>% 
  rename(Genre___Others = Genre___Others2) %>% 
  select(year, all_of(starts_with("Genre")), price_usd) %>% 
  pivot_longer(Genre___Photography:Genre___Others, names_to = "genre", values_to = "sales") %>% 
  filter(sales == 1) %>% 
  group_by(year) %>% 
  mutate(median.overall = median(price_usd, na.rm = TRUE)) %>% 
  group_by(year, genre) %>% 
  mutate(median.genre = median(price_usd, na.rm = TRUE)) %>% 
  ungroup() %>% 
  select(-price_usd) %>% 
  distinct(.keep_all = TRUE) %>% 
  mutate(genre = gsub("Genre___", "", genre))

p <- plot_ly (data = df2) |>
  add_trace(x = ~year, y = ~median.overall, type = "scatter", mode = "markers+lines") |>
  add_trace(x = ~year, y = ~median.genre, color = ~genre, type = "scatter", mode = "markers+lines")|> 
  layout(title = "Sale price over years 1997-2012")
p
Code
p2 <- p |> layout(updatemenus = list(
  list(
    y = 1,
    buttons = list(
      list(method = "update",
           args = list(list(visible =  list(TRUE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                       list(yaxis = list(title = "Overall median of sales price"))),
           label = "Overall"),

      list(method = "update",
           args = list(list(visible =  list(FALSE,TRUE,TRUE,TRUE,TRUE,TRUE)),
                       list(yaxis = list(title = "Median of sales price by genre"))),
           label = "By all genre"))
  )
))


p2
Code
print("Overall, the sale price decreased from 1997 to 2001 then increased from 2001 to 2008 and finally decrease after 2008. If we look at each genre, we can see the similar pattern for sculpture and photograph. The sale price for prints fluctuated and that for painting have a overall increasing trend.")
[1] "Overall, the sale price decreased from 1997 to 2001 then increased from 2001 to 2008 and finally decrease after 2008. If we look at each genre, we can see the similar pattern for sculpture and photograph. The sale price for prints fluctuated and that for painting have a overall increasing trend."

Problem 3 - data.table

Code
library(nycflights13)
Warning: package 'nycflights13' was built under R version 4.2.3
Code
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
Code
flights <- as.data.table(flights)
airports <- as.data.table(airports)


# Departure

a <- flights[,
        .(origin,
          numflight = .N, 
          mean_delay = mean(dep_delay, na.rm = TRUE),
          med_delay = median(dep_delay, na.rm = TRUE),
          faa = origin),
        by = origin
        ][numflight >= 10,
          .(faa, mean_delay, med_delay)
        ] 

merge(a, airports, by = "faa", all.x = TRUE)[order(-mean_delay),.(name, mean_delay, med_delay)]
                  name mean_delay med_delay
1: Newark Liberty Intl   15.10795        -1
2: John F Kennedy Intl   12.11216        -1
3:          La Guardia   10.34688        -3
Code
# Arrival

b <- flights[,
        .(dest,
          numflight = .N, 
          mean_delay = mean(arr_delay, na.rm = TRUE),
          med_delay = median(arr_delay, na.rm = TRUE),
          faa = dest),
        by = dest
        ][numflight >= 10,
          .(faa, mean_delay, med_delay)] 

c <- merge(b, airports, by = "faa", all.x = TRUE)[order(-mean_delay),.(mean_delay, med_delay, name = coalesce(name, faa))][order(-mean_delay), .(name, mean_delay, med_delay)]

print(c, n = nrow(c))
                                     name   mean_delay med_delay
  1:                Columbia Metropolitan  41.76415094      28.0
  2:                           Tulsa Intl  33.65986395      14.0
  3:                    Will Rogers World  30.61904762      16.0
  4:                 Jackson Hole Airport  28.09523810      15.0
  5:                        Mc Ghee Tyson  24.06920415       2.0
  6:               Dane Co Rgnl Truax Fld  20.19604317       1.0
  7:                        Richmond Intl  20.11125320       1.0
  8:        Akron Canton Regional Airport  19.69833729       3.0
  9:                      Des Moines Intl  19.00573614       0.0
 10:                   Gerald R Ford Intl  18.18956044       1.0
 11:                      Birmingham Intl  16.87732342      -2.0
 12:         Theodore Francis Green State  16.23463687       1.0
 13: Greenville-Spartanburg International  15.93544304      -0.5
 14:    Cincinnati Northern Kentucky Intl  15.36456376      -3.0
 15:            Savannah Hilton Head Intl  15.12950601      -1.0
 16:          Manchester Regional Airport  14.78755365      -3.0
 17:                          Eppley Afld  14.69889841      -2.0
 18:                               Yeager  14.67164179      -1.5
 19:                     Kansas City Intl  14.51405836       0.0
 20:                          Albany Intl  14.39712919      -4.0
 21:                General Mitchell Intl  14.16722038       0.0
 22:                       Piedmont Triad  14.11260054      -2.0
 23:               Washington Dulles Intl  13.86420212      -3.0
 24:               Cherry Capital Airport  12.96842105     -10.0
 25:              James M Cox Dayton Intl  12.68048606      -3.0
 26:     Louisville International Airport  12.66938406      -2.0
 27:                  Chicago Midway Intl  12.36422360      -1.0
 28:                      Sacramento Intl  12.10992908       4.0
 29:                    Jacksonville Intl  11.84483416      -2.0
 30:                       Nashville Intl  11.81245891      -2.0
 31:                Portland Intl Jetport  11.66040210      -4.0
 32:               Greater Rochester Intl  11.56064461      -5.0
 33:      Hartsfield Jackson Atlanta Intl  11.30011285      -1.0
 34:                Lambert St Louis Intl  11.07846451      -3.0
 35:                         Norfolk Intl  10.94909344      -4.0
 36:            Baltimore Washington Intl  10.72673385      -5.0
 37:                         Memphis Intl  10.64531435      -2.5
 38:                   Port Columbus Intl  10.60132291      -3.0
 39:                  Charleston Afb Intl  10.59296847      -4.0
 40:                    Philadelphia Intl  10.12719014      -3.0
 41:                  Raleigh Durham Intl  10.05238095      -3.0
 42:                    Indianapolis Intl   9.94043412      -3.0
 43:            Charlottesville-Albemarle   9.50000000      -5.0
 44:               Cleveland Hopkins Intl   9.18161129      -5.0
 45:        Ronald Reagan Washington Natl   9.06695204      -2.0
 46:                      Burlington Intl   8.95099602      -4.0
 47:                 Buffalo Niagara Intl   8.94595186      -5.0
 48:                Syracuse Hancock Intl   8.90392501      -5.0
 49:                          Denver Intl   8.60650021      -2.0
 50:                      Palm Beach Intl   8.56297210      -3.0
 51:                                  BQN   8.24549550      -1.0
 52:                             Bob Hope   8.17567568      -3.0
 53:       Fort Lauderdale Hollywood Intl   8.08212154      -3.0
 54:                          Bangor Intl   8.02793296      -9.0
 55:           Asheville Regional Airport   8.00383142      -1.0
 56:                                  PSE   7.87150838       0.0
 57:                      Pittsburgh Intl   7.68099053      -5.0
 58:                       Gallatin Field   7.60000000      -2.0
 59:                 NW Arkansas Regional   7.46572581      -2.0
 60:                           Tampa Intl   7.40852503      -4.0
 61:               Charlotte Douglas Intl   7.36031885      -3.0
 62:             Minneapolis St Paul Intl   7.27016886      -5.0
 63:                      William P Hobby   7.17618819      -4.0
 64:                         Bradley Intl   7.04854369     -10.0
 65:                     San Antonio Intl   6.94537178      -9.0
 66:                      South Bend Rgnl   6.50000000      -3.5
 67:     Louis Armstrong New Orleans Intl   6.49017497      -6.0
 68:                        Key West Intl   6.35294118       7.0
 69:                        Eagle Co Rgnl   6.30434783      -4.0
 70:                Austin Bergstrom Intl   6.01990875      -5.0
 71:                   Chicago Ohare Intl   5.87661475      -8.0
 72:                         Orlando Intl   5.45464309      -5.0
 73:               Detroit Metro Wayne Co   5.42996346      -7.0
 74:                        Portland Intl   5.14157973      -5.0
 75:                        Nantucket Mem   4.85227273      -3.0
 76:                      Wilmington Intl   4.63551402      -7.0
 77:                    Myrtle Beach Intl   4.60344828     -13.0
 78:    Albuquerque International Sunport   4.38188976      -5.5
 79:         George Bush Intercontinental   4.24079040      -5.0
 80:        Norman Y Mineta San Jose Intl   3.44817073      -7.0
 81:               Southwest Florida Intl   3.23814963      -5.0
 82:                       San Diego Intl   3.13916574      -5.0
 83:              Sarasota Bradenton Intl   3.08243131      -5.0
 84:            Metropolitan Oakland Intl   3.07766990      -9.0
 85:   General Edward Lawrence Logan Intl   2.91439222      -9.0
 86:                   San Francisco Intl   2.67289152      -8.0
 87:                                  SJU   2.52052659      -6.0
 88:                         Yampa Valley   2.14285714       2.0
 89:              Phoenix Sky Harbor Intl   2.09704733      -6.0
 90:            Montrose Regional Airport   1.78571429     -10.5
 91:                     Los Angeles Intl   0.54711094      -7.0
 92:               Dallas Fort Worth Intl   0.32212685      -9.0
 93:                           Miami Intl   0.29905978      -9.0
 94:                       Mc Carran Intl   0.25772849      -8.0
 95:                  Salt Lake City Intl   0.17625459      -8.0
 96:                           Long Beach  -0.06202723     -10.0
 97:                Martha\\\\'s Vineyard  -0.28571429     -11.0
 98:                  Seattle Tacoma Intl  -1.09909910     -11.0
 99:                        Honolulu Intl  -1.36519258      -7.0
100:                                  STT  -3.83590734      -9.0
101:            John Wayne Arpt Orange Co  -7.86822660     -11.0
102:                    Palm Springs Intl -12.72222222     -13.5
                                     name   mean_delay med_delay